library(readxl)
library(corrplot)
library(lm.beta)
library(Hmisc)
library(qgraph)
library(plyr)
library(dplyr)
library(psych)
library(neurobase)
library(oro.nifti)
library(dplyr)
library(png)
library(ggiraphExtra)
library(data.table)
library(cowplot)
library(ggplot2)
library(patchwork)
# library(NMF)
library(ggbeeswarm)
#source("/Volumes/Users/nfranzme/R_Projects/R_Imaging/workbench/workbench_map_volume_to_surface#.R")
library(ggpubr)
library(lubridate)
library(reticulate)
library(svMisc)
library(NetworkToolbox)
library(readxl)
library(writexl)
library(lme4)
library(lmerTest)
library(stringr)
library(boot)
library(table1)

load input

define directories

dir.root = "/Volumes/Users/nfranzme/R_Projects/Tutorials/GSN_course/fMRI_Day2_Graph_Theory/"
dir.fc.200 = paste0(dir.root, "/Schaefer200_functional_connectivity/")
dir.data.sheets = paste0(dir.root, "/data_sheets/")

Data preparation

Step 1: load and prepare the data

The data frame contains data from the Alzheimer’s disease neuroimaging initiative (ADNI) database.
ADNI is a large multi-center study that focuses on multi-modal neuroimaging in Alzheimer’s disease patients, more information can be found here: http://adni.loni.usc.edu

Included imaging modalities in the current dataset are tau-PET, amyloid-PET and resting-state fMRI. We will subset the data to healthy controls (i.e. cognitively normal [CN] individuals without evidence for amyloid pathology [Ab.neg]), and subjects across the Alzheimer’s disease (AD) spectrum, defined as having abnormally elevated amyloid-beta (Ab) based on amyloid-PET. The AD spectrum includes individuals who are still cognitively normal (CN.Ab.pos, i.e. preclinical AD), subjects who show mild cognitive impairment (MCI.Ab.pos, i.e. prodromal AD), and patients with AD dementia (Dementia.Ab.pos)

For this course we will work primarily with cross-sectional data, stored in the “ADNI.data.bl” data frame. However, some individuals also have longitudinal tau-PET, you could have a look at those data to if there is time left.

-> read in data and select subjects with longitudinal data

# read in ADNI data
ADNI.data <- readxl::read_xlsx(paste0(dir.data.sheets, "/ADNI_fMRI_PET.xlsx"))
ADNI.data <- subset(ADNI.data, ID !="sub-4431")
# merge diagnosis and amyloid status
ADNI.data$DX.Ab <- paste0(ADNI.data$DX, ".", ADNI.data$amyloid.global.SUVR.status)
ADNI.data$DX.Ab <- factor(ADNI.data$DX.Ab, 
                          levels = c("CN.Ab.neg", 
                                     "CN.Ab.pos", 
                                     "MCI.Ab.pos", 
                                     "Dementia.Ab.pos"))

# subset to controls and subjects across the AD spectrum (CN = cognitively normal, MCI = Mild Cognitive Impairment, Dementia = Alzheimer's disease dementia)
ADNI.data <- subset(ADNI.data, DX.Ab %in% c("CN.Ab.neg", "CN.Ab.pos", "MCI.Ab.pos", "Dementia.Ab.pos"))

# locate functional connectivity matrices
ADNI.data$FC.mat.200 <- str_replace(ADNI.data$FC.mat.200, "/Network/Cluster/ADNI/functional_connectivity/Schaefer_200/", 
                                    paste0(dir.root, "/Schaefer200_functional_connectivity/"))

# select baseline data
ADNI.data.bl = subset(ADNI.data, tau.pet.fu.yrs == 0)

-> create some overview tables

First, you should get an idea of the type of data we’re working with. So we will check some basic distributions of biomarker and cognitive data for the current patient cohort.

create “table 1”

We can first create an overview table of demographics, as well as amyloid-PET and tau-PET load, to
see how biomarkers are distributed across diagnostic groups.
We’ll look at the following variables (Variable names on the left, explanation on the right)

age = age
sex = sex
centiloid = global amyloid-PET level (a typical “cut-off” for abnormality is 20)
tau.global.SUVR = global tau-PET level (a typical “cut-off” for abnormality is 1.3)
MMSE = Mini Mental State Exam, i.e. a global cognitive screening instrument, where lower values indicate stronger cognitive impairment
ADNI_MEM = A memory composite score across several memory tests. Lower values indicate stronger impairment

table1(~ age + factor(sex) + centiloid + tau.global.SUVR + MMSE + ADNI_MEM | DX.Ab, data = ADNI.data.bl)
CN.Ab.neg
(N=193)
CN.Ab.pos
(N=67)
MCI.Ab.pos
(N=37)
Dementia.Ab.pos
(N=20)
Overall
(N=317)
age
Mean (SD) 72.1 (6.96) 74.8 (5.96) 75.9 (6.75) 80.3 (8.24) 73.7 (7.15)
Median [Min, Max] 71.0 [56.0, 92.0] 75.0 [65.0, 90.0] 76.0 [61.0, 90.0] 82.0 [62.0, 94.0] 73.0 [56.0, 94.0]
factor(sex)
F 119 (61.7%) 41 (61.2%) 19 (51.4%) 9 (45.0%) 188 (59.3%)
M 74 (38.3%) 26 (38.8%) 18 (48.6%) 11 (55.0%) 129 (40.7%)
centiloid
Mean (SD) -5.57 (12.8) 65.6 (34.7) 76.4 (31.7) 95.8 (41.6) 25.4 (46.0)
Median [Min, Max] -7.12 [-34.3, 21.9] 58.3 [23.2, 141] 74.6 [22.8, 142] 95.5 [44.2, 198] 6.74 [-34.3, 198]
tau.global.SUVR
Mean (SD) 1.06 (0.0741) 1.10 (0.0999) 1.21 (0.222) 1.37 (0.399) 1.11 (0.166)
Median [Min, Max] 1.06 [0.865, 1.45] 1.10 [0.904, 1.62] 1.15 [0.908, 1.83] 1.24 [0.971, 2.37] 1.08 [0.865, 2.37]
MMSE
Mean (SD) 29.3 (0.856) 28.7 (1.74) 26.7 (2.45) 22.9 (4.54) 27.6 (3.17)
Median [Min, Max] 30.0 [27.0, 30.0] 29.5 [24.0, 30.0] 27.0 [22.0, 30.0] 23.0 [14.0, 29.0] 29.0 [14.0, 30.0]
Missing 157 (81.3%) 49 (73.1%) 16 (43.2%) 7 (35.0%) 229 (72.2%)
ADNI_MEM
Mean (SD) 1.05 (0.564) 0.924 (0.527) 0.143 (0.584) -0.679 (0.693) 0.808 (0.741)
Median [Min, Max] 0.985 [-0.229, 3.14] 0.874 [-0.119, 1.94] 0.0840 [-0.964, 1.69] -0.531 [-2.39, 0.603] 0.836 [-2.39, 3.14]
Missing 1 (0.5%) 0 (0%) 0 (0%) 0 (0%) 1 (0.3%)

As you can see, amyloid and tau levels increase (i.e. become more abnormal) across more severe
disease levels. Also, MMSE and ADNI_MEM values decrease, indicating stronger cognitive impairment.

plot biomarker distributions

Next, we should look at biomarker distributions of amyloid and tau pathology,
to get an idea of how amyloid and tau relate to increasing disease severity.
We’ll use ggplot2 for plotting. Some useful ggplot2 tutorials can be found here:

https://ggplot2.tidyverse.org
http://r-statistics.co/Complete-Ggplot2-Tutorial-Part1-With-R-Code.html
http://r-statistics.co/ggplot2-Tutorial-With-R.html

1. Amyloid-PET

Let’s first create a boxplot/beeswarm plot to illustrate the amyloid distribution across different diagnostic groups. The variables we’ll look at are centiloid (i.e. amyloid-PET) and DX.Ab (i.e. diagnostic groups)

# plot group differences
ggplot(data = ADNI.data.bl,
       aes( 
         x = DX.Ab,
         y = centiloid)) + 
  geom_boxplot(alpha = 0.1, notch = T, width = 0.5) + 
  geom_beeswarm(alpha = 0.4) + 
  theme_minimal() + 
  geom_hline(yintercept = 20, linetype = 2)

As you can see, amyloid levels increase across the AD spectrum. The dashed line indicates where amyloid becomes abnormal.

To statistically test these group differences, we can run an ANCOVA. It’s quite common standard to control for some usual covariates in clinical research, such as age and sex (and often also education)

So, lets quantify the group differences using an ANCOVA with a post-hoc Tukey Test. The ANCOVA tests whether there is an overall group-difference between diagnostic groups. The Post-Hoc Tukey test assesses the difference between single diagnostic groups.

# test group differences (ANCOVA), controlling for age and sex

## run ANCOVA
tmp.aov <- aov(data = ADNI.data.bl,
               centiloid ~ DX.Ab + age + sex); 

## summarize output
summary(tmp.aov)
##              Df Sum Sq Mean Sq F value Pr(>F)    
## DX.Ab         3 488804  162935 284.751 <2e-16 ***
## age           1   1071    1071   1.871  0.172    
## sex           1   1302    1302   2.275  0.132    
## Residuals   311 177955     572                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## run post-hoc Tukey test to determine group differences
TukeyHSD(tmp.aov, which = "DX.Ab")
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = centiloid ~ DX.Ab + age + sex, data = ADNI.data.bl)
## 
## $DX.Ab
##                                 diff       lwr       upr     p adj
## CN.Ab.pos-CN.Ab.neg         71.15427 62.393041  79.91551 0.0000000
## MCI.Ab.pos-CN.Ab.neg        81.98326 70.894598  93.07192 0.0000000
## Dementia.Ab.pos-CN.Ab.neg  101.40212 86.887992 115.91624 0.0000000
## MCI.Ab.pos-CN.Ab.pos        10.82898 -1.826335  23.48430 0.1226984
## Dementia.Ab.pos-CN.Ab.pos   30.24784 14.504318  45.99136 0.0000068
## Dementia.Ab.pos-MCI.Ab.pos  19.41886  2.270748  36.56697 0.0192694
2. Tau-PET

Let’s do the same for tau-PET, so lets first create a boxplot/beeswarm plot to illustrate the tau distribution across diagnostic groups

# plot group differences
ggplot(data = ADNI.data.bl,
       aes( 
         x = DX.Ab,
         y = tau.global.SUVR)) + 
  geom_boxplot(alpha = 0.1, notch = T, width = 0.5) + 
  geom_beeswarm(alpha = 0.4) + 
  theme_minimal() + 
  geom_hline(yintercept = 1.3, linetype = 2)

As you can see, tau levels also increase across the AD spectrum. However, people usually start surpassing the threshold at symptomatic stages (MCI and Dementia). This is the case because amyloid precedes symptom onset by decades, while tau pathology is much closer to symptom onset. In fact, tau pathology is the key driver of neurodegeneration and cognitive decline, so you typically see abnormal tau first when people start to show symptoms.

To statistically test these group differences, we can again run an ANCOVA with a post-hoc Tukey Test

# test group differences (ANCOVA), controlling for age and sex

## run ANCOVA
tmp.aov <- aov(data = ADNI.data.bl,
               tau.global.SUVR ~ DX.Ab + age + sex); 

## summarize output
summary(tmp.aov)
##              Df Sum Sq Mean Sq F value   Pr(>F)    
## DX.Ab         3  2.219  0.7396  38.088  < 2e-16 ***
## age           1  0.372  0.3718  19.145 1.66e-05 ***
## sex           1  0.089  0.0887   4.567   0.0334 *  
## Residuals   311  6.039  0.0194                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## run post-hoc Tukey test to determine group differences
TukeyHSD(tmp.aov, which = "DX.Ab")
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = tau.global.SUVR ~ DX.Ab + age + sex, data = ADNI.data.bl)
## 
## $DX.Ab
##                                  diff         lwr        upr     p adj
## CN.Ab.pos-CN.Ab.neg        0.03732495 -0.01371349 0.08836339 0.2348737
## MCI.Ab.pos-CN.Ab.neg       0.15133527  0.08673844 0.21593209 0.0000000
## Dementia.Ab.pos-CN.Ab.neg  0.31011331  0.22556150 0.39466512 0.0000000
## MCI.Ab.pos-CN.Ab.pos       0.11401032  0.04028695 0.18773368 0.0004684
## Dementia.Ab.pos-CN.Ab.pos  0.27278836  0.18107471 0.36450201 0.0000000
## Dementia.Ab.pos-MCI.Ab.pos 0.15877804  0.05888199 0.25867410 0.0003001
3. Staging of tau pathology

One thing to keep in mind is that the above described analyses on tau-PET are based on “global” tau levels. However, tau does not - in contrast to amyloid - accumulate globally throughout the brain, but rather follows a consecutive spreading pattern that typically starts in the inferior temporal lobe. This “spreading pattern” of tau pathology has been already described in the nineties by Braak & Braak.

The surface rendering shows the six Braak-stages. Abnormal tau pathology is typically found first in Stage 1 (Entorhinal cortex) and then spreads to the hippocampus (Stage 2), the inferior temporal and limbic cortex (Stages 3&4) and finally to the association and primary somatosensory cortex (Stages 5&6). The plot on the left illustrates the likelihood of tau abnormality within each Braak stage across a group of AD patients, showing that early Braak stages typically show earlier tau abnormality than later Braak stages. Note that we usually discard the hippocampus when looking at tau-PET, because the tau-PET tracer shows quite significant off-target binding in the choroid plexus which is right next to the hippocampus. Thus, tau-PET signal in the hippocampus is quite confounded, at least for first-generation tau-PET tracers like flortaucipir.

Next, lets look at the distribution of tau pathology in the separate Braak stage ROIs. Tau-PET within these Braak-stage ROIs is labeled as tau.braak1.SUVR, tau.braak3.SUVR, tau.braak4.SUVR ,etc.

# plot group differences
p1 <- ggplot(data = ADNI.data.bl,
       aes( 
         x = DX.Ab,
         y = tau.braak1.SUVR)) + 
  geom_boxplot(alpha = 0.1, notch = T, width = 0.5) + 
  geom_beeswarm(alpha = 0.4) + 
  theme_minimal() + 
  geom_hline(yintercept = 1.3, linetype = 2) + 
  ggtitle("Braak 1")

p2 <- ggplot(data = ADNI.data.bl,
       aes( 
         x = DX.Ab,
         y = tau.braak3.SUVR)) + 
  geom_boxplot(alpha = 0.1, notch = T, width = 0.5) + 
  geom_beeswarm(alpha = 0.4) + 
  theme_minimal() + 
  geom_hline(yintercept = 1.3, linetype = 2) +
  ggtitle("Braak 3")


p3 <- ggplot(data = ADNI.data.bl,
       aes( 
         x = DX.Ab,
         y = tau.braak4.SUVR)) + 
  geom_boxplot(alpha = 0.1, notch = T, width = 0.5) + 
  geom_beeswarm(alpha = 0.4) + 
  theme_minimal() + 
  geom_hline(yintercept = 1.3, linetype = 2) +  
  ggtitle("Braak 4")


p4 <- ggplot(data = ADNI.data.bl,
       aes( 
         x = DX.Ab,
         y = tau.braak5.SUVR)) + 
  geom_boxplot(alpha = 0.1, notch = T, width = 0.5) + 
  geom_beeswarm(alpha = 0.4) + 
  theme_minimal() + 
  geom_hline(yintercept = 1.3, linetype = 2) + 
  ggtitle("Braak 5")


p5 <- ggplot(data = ADNI.data.bl,
       aes( 
         x = DX.Ab,
         y = tau.braak6.SUVR)) + 
  geom_boxplot(alpha = 0.1, notch = T, width = 0.5) + 
  geom_beeswarm(alpha = 0.4) + 
  theme_minimal() + 
  geom_hline(yintercept = 1.3, linetype = 2) + 
  ggtitle("Braak 6")


p1 / p2 / p3 / p4 / p5

You can see that earlier Braak stage ROIs show abnormal signal more early during the disease course than later Braak stage ROIs

4. Association between amyloid and tau

Our current understanding of Alzheimer’s disease suggests that amyloid-beta deposition is the first pathological event to happen, that triggers downstream tau accumulation, neurodegeneration and cognitive decline. Thus, tau and amyloid should be correlated. Let’s have a look:

ggplot(ADNI.data.bl,
       aes(
         x = centiloid,
         y = tau.global.SUVR
       )) + 
  geom_point(alpha = 0.5) + 
  geom_smooth(method = "lm") + 
  theme_minimal()
## `geom_smooth()` using formula 'y ~ x'

This shows that higher amyloid is clearly associated with higher tau pathology. We can quantify the association using correlation or linear regression controlling for age and sex

# correlation
rcorr(ADNI.data.bl$centiloid,
      ADNI.data.bl$tau.global.SUVR)
##      x    y
## x 1.00 0.45
## y 0.45 1.00
## 
## n= 317 
## 
## 
## P
##   x  y 
## x     0
## y  0
# linear regression controlling for age and sex
tmp.lm <- lm(data = ADNI.data.bl,
                     tau.global.SUVR ~ centiloid + age + sex); summary(tmp.lm)
## 
## Call:
## lm(formula = tau.global.SUVR ~ centiloid + age + sex, data = ADNI.data.bl)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.28793 -0.06976 -0.01552  0.04633  1.13638 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.3774498  0.0874438  15.752  < 2e-16 ***
## centiloid    0.0018142  0.0001856   9.773  < 2e-16 ***
## age         -0.0041736  0.0012097  -3.450 0.000637 ***
## sexM        -0.0207299  0.0168531  -1.230 0.219608    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1456 on 313 degrees of freedom
## Multiple R-squared:  0.2393, Adjusted R-squared:  0.232 
## F-statistic: 32.83 on 3 and 313 DF,  p-value: < 2.2e-16

TASK 1

Now that you know how to run group comparisons (i.e. ANCOVAs) and correlations/regression (linear models), try to check whether amyloid and tau are associated with cognition (e.g. MMSE and ADNI_MEM). How do amyloid and tau relate to cognitive performance? Try to create figures and statistics showing differences in cognitive performance between diagnostic groups, and associations between amyloid, tau and cognition.

fMRI analysis

Next lets go have a look at some fMRI parameters. We have prepared functional connectivity data using a common 200 ROI cortical atlas (see https://pubmed.ncbi.nlm.nih.gov/28981612/). A surface rendering of the atlas can be found below, including the seven networks (DMN = Default-Mode, DAN = Dorsal Attention, VAN = Ventral Attention, FPCN = Fronto-parietal control, Visual, Motor, Limbic)

1. Importing connectivity matrices

Atlas based connectivity is usually stored in matrix format also referred to as adjacency matrix. For 200 ROIs this means that the resulting adjacency matrix 200x200 elements large, where each element corresponds to the connectivity-metric between two ROIs (e.g. Fisher-z transformed Pearson correlation). All connectivity matrices are stored in the “Schaefer200_functional_connectivity” folder on the hard drive in .csv format. This folder is already stored in your R worksapce as “dir.fc.200”. The paths to the matrices are stored in the “ADNI.data.bl” data frame in the variable “FC.mat.200” Lets load an example to see how this looks like.

# import and format connectivity matrix using the read.csv command
current.adj.matrix = read.csv(ADNI.data.bl$FC.mat.200[1])

When checking the dimension of this matrix, we realize that there are 201 columns and 200 rows. The first column contains the rownames, so we need to remove it in order to get a symmetrical 200x200 matrix

# check dimensions
dim(current.adj.matrix)
## [1] 200 201
# remove first column
current.adj.matrix.cleaned <- current.adj.matrix[,-1]

# transform to matrix format
current.adj.matrix.cleaned.matrix <- as.matrix(current.adj.matrix.cleaned)

# check dimensions
dim(current.adj.matrix.cleaned)
## [1] 200 200

Now the connectivity matrix has 200 rows and 200 columns. Lets have a look at the overall connectivity pattern. You can plot connectivity matrices using the “corrplot” package.

# plot correlation matrix
corrplot::corrplot(current.adj.matrix.cleaned.matrix, 
                   diag = FALSE, 
                   tl.pos = "n", 
                   tl.cex = 0.5, 
                   method = "color", 
                   is.corr = FALSE)

You can see that the connectivity matrix is symmetrical, with positive correlations and negative correlations. Positive correlations (blue colors) mean that two brain regions show correlated BOLD signal, suggesting that they’re functionally connected. Negative correlations indicate anti-correlations, where one region is up-regulated and another one is down-regulated at the same time.

2. Preparing connectivity matrices for graph theoretical analyses

In order to perform graph theoretical analyses, we usually perform some additional preprocessing steps, e.g. we remove negative connections or perform thresholding of the matrix.

I’ve written a function that can perform some basic thresholding operations for you. The function below can be called from the command line, and need several input arguments adj_matrix = a symmetrical adjacency matrix retain_negatives = a boolean statement (TRUE/FALSE) indicating whether or not negative values should be retained density = a percentage of connections that should be retained. 1 indicates that 100% of connections should be retained, 0.3 means that only 30% of the strongest connections will be retained replace_with_NAs = a boolean statement (TRUE/FALSE) indicating whether values that are eliminated from the matrix should be replaced with an NA (not assigned). Otherwise, values are replaced with a zero.

# define matrix thresholding function

adj_mat_density_thresh=function(adj_matrix, retain_negatives, density, replace_with_NAs){
  # exclude negative values
  if (retain_negatives==F){adj_matrix[adj_matrix<0]=0}
  # determine threshold
  threshold=quantile(abs(adj_matrix), 1-density)
  # apply threshold
  if (replace_with_NAs==F){adj_matrix[abs(adj_matrix)<threshold]=0}
  if (replace_with_NAs==T){adj_matrix[abs(adj_matrix)<threshold]=NA}
  
  return(adj_matrix)
  
}

Next, lets threshold the matrix that we’ve imported previously and eliminate the negative values (i.e. retain_negatives = F), while keeping all positive values (i.e. density = 1). Everything else should be replaced with a zero.

Removing negative connections

# call the function and store the output in a new variable
current.adj.matrix.cleaned.matrix.thr1 <- adj_mat_density_thresh(adj_matrix = current.adj.matrix.cleaned.matrix, 
                                                                retain_negatives = F, 
                                                                density = 1, 
                                                                replace_with_NAs = F)

Let’s see how the new matrix looks like:

# plot correlation matrix
corrplot::corrplot(current.adj.matrix.cleaned.matrix.thr1, 
                   diag = FALSE, 
                   tl.pos = "n", 
                   tl.cex = 0.5, 
                   method = "color", 
                   is.corr = FALSE)

You can see that all the negative values have been eliminated, while the positive values have been kept.

Density thresholding

Next, lets see what thresholding does to the matrix. Now, we want to retain only 10% of the strongest positive connections (i.e. a density of 0.1)

# call the function and store the output in a new variable
current.adj.matrix.cleaned.matrix.thr0p1 <- adj_mat_density_thresh(adj_matrix = current.adj.matrix.cleaned.matrix, 
                                                                retain_negatives = F, 
                                                                density = 0.1, 
                                                                replace_with_NAs = F)

Let’s see how the thresholded matrix looks like:

# plot correlation matrix
corrplot::corrplot(current.adj.matrix.cleaned.matrix.thr0p1, 
                   diag = FALSE, 
                   tl.pos = "n", 
                   tl.cex = 0.5, 
                   method = "color", 
                   is.corr = FALSE)

You can see that the matrix looks quite sparse now and only the “backbone” of very strong connections is retained. Sometimes thresholding is done to eliminate “noisy” and “weak” connections. However, these weak connections have actually been shown to encode some important inter-individual information, as shown by papers on “connectome fingerprinting” (see work by Emily Finn https://www.nature.com/articles/nn.4135)

Binarization

While the matrix above is relatively sparse, it still includes information about the “strength” of a connection, which we refer to as a weighted matrix Some researchers, however, use binary matrices, which just encode the presence or absence of a connection. This is often done for structural connectivity matrices. Binarization can be done quite easily using the NetworkToolbox. Lets binarize the matrix in which we’ve retained only 10% of the strongest connections

current.adj.matrix.cleaned.matrix.thr0p1.bin <- NetworkToolbox::binarize(current.adj.matrix.cleaned.matrix.thr0p1)

# plot correlation matrix
corrplot::corrplot(current.adj.matrix.cleaned.matrix.thr0p1.bin, 
                   diag = FALSE, 
                   tl.pos = "n", 
                   tl.cex = 0.5, 
                   method = "color", 
                   is.corr = FALSE)

Now, all information about the strength of connections is lost.

3. Computing graph theoretical parameters

Next, lets use the connectivity matrix to compute some basic graph theoretical parameters.
For this, the “NetworkToolbox” is quite helpful, which comes with some easy to use commands (see https://cran.r-project.org/web/packages/NetworkToolbox/NetworkToolbox.pdf)

3.1 Clustering

The clustering coefficient quantifies the abundance of connected triangles in a network and is a major descriptive statistics of networks. This means, if a node is connected to two neighbours, and if these neighbours are also interconnected, they form a cluster (triangle), as illustrated in the example below.

So lets compute the clustering coefficient on our matrix. We can compute a “Global” clustering coefficient for the entire network, as well as a clustering coefficient for each single node in the network. To compute the clustering coefficient, we use the clustcoeff command from the NetworkToolbox. This command requires two inputs, an adjacency matrix and a TRUE/FALSE statement on whether the matrix is weighted or binary. Clustering is usually computed on networks with “positive” connections only, hence we need to use the matrix from which we have eliminated the negative connections called current.adj.matrix.cleaned.matrix.thr1 You can find information about this function by typing help(“clustcoeff”) in the console. We can compute the clustering coefficient for networks thresholded at a density of 1 and 0.1 to see how the clustering coefficient changes.

# compute clustering 
tmp.clustering.weighted.thr1 <- clustcoeff(current.adj.matrix.cleaned.matrix.thr1, weighted = T)
tmp.clustering.weighted.thr0p1 <- clustcoeff(current.adj.matrix.cleaned.matrix.thr0p1, weighted = T)

The output contains two different measures, the global clustering coefficient CC, and the local clustering coefficient for each ROI called CCi. You can choose which one to look at by navigating with the $ sign.

# global clustering
tmp.clustering.weighted.thr1$CC
## [1] 0.18073
tmp.clustering.weighted.thr0p1$CC
## [1] 0.389075
# ROI-wise clustering
tmp.clustering.weighted.thr1$CCi
##    X1    X2    X3    X4    X5    X6    X7    X8    X9   X10   X11   X12   X13 
## 0.183 0.151 0.187 0.202 0.108 0.188 0.161 0.201 0.224 0.229 0.199 0.194 0.195 
##   X14   X15   X16   X17   X18   X19   X20   X21   X22   X23   X24   X25   X26 
## 0.203 0.127 0.180 0.150 0.170 0.162 0.171 0.179 0.192 0.203 0.240 0.266 0.271 
##   X27   X28   X29   X30   X31   X32   X33   X34   X35   X36   X37   X38   X39 
## 0.286 0.278 0.296 0.269 0.189 0.164 0.171 0.178 0.172 0.170 0.191 0.242 0.169 
##   X40   X41   X42   X43   X44   X45   X46   X47   X48   X49   X50   X51   X52 
## 0.242 0.212 0.282 0.157 0.163 0.138 0.129 0.175 0.162 0.173 0.170 0.151 0.180 
##   X53   X54   X55   X56   X57   X58   X59   X60   X61   X62   X63   X64   X65 
## 0.189 0.291 0.160 0.168 0.162 0.212 0.206 0.156 0.143 0.176 0.162 0.129 0.138 
##   X66   X67   X68   X69   X70   X71   X72   X73   X74   X75   X76   X77   X78 
## 0.121 0.101 0.121 0.125 0.169 0.136 0.131 0.181 0.180 0.151 0.181 0.167 0.174 
##   X79   X80   X81   X82   X83   X84   X85   X86   X87   X88   X89   X90   X91 
## 0.191 0.167 0.187 0.150 0.155 0.071 0.164 0.130 0.142 0.143 0.096 0.140 0.207 
##   X92   X93   X94   X95   X96   X97   X98   X99  X100  X101  X102  X103  X104 
## 0.200 0.193 0.207 0.213 0.221 0.168 0.158 0.176 0.091 0.160 0.182 0.206 0.195 
##  X105  X106  X107  X108  X109  X110  X111  X112  X113  X114  X115  X116  X117 
## 0.204 0.133 0.143 0.133 0.199 0.188 0.146 0.182 0.187 0.192 0.172 0.164 0.153 
##  X118  X119  X120  X121  X122  X123  X124  X125  X126  X127  X128  X129  X130 
## 0.155 0.118 0.151 0.147 0.138 0.172 0.278 0.289 0.240 0.275 0.297 0.275 0.267 
##  X131  X132  X133  X134  X135  X136  X137  X138  X139  X140  X141  X142  X143 
## 0.246 0.296 0.251 0.282 0.211 0.158 0.182 0.253 0.227 0.182 0.208 0.242 0.236 
##  X144  X145  X146  X147  X148  X149  X150  X151  X152  X153  X154  X155  X156 
## 0.281 0.235 0.241 0.123 0.163 0.136 0.130 0.197 0.145 0.131 0.146 0.152 0.212 
##  X157  X158  X159  X160  X161  X162  X163  X164  X165  X166  X167  X168  X169 
## 0.182 0.238 0.149 0.125 0.169 0.156 0.199 0.199 0.141 0.237 0.172 0.175 0.139 
##  X170  X171  X172  X173  X174  X175  X176  X177  X178  X179  X180  X181  X182 
## 0.139 0.169 0.134 0.136 0.184 0.206 0.228 0.172 0.139 0.160 0.132 0.230 0.131 
##  X183  X184  X185  X186  X187  X188  X189  X190  X191  X192  X193  X194  X195 
## 0.130 0.168 0.170 0.155 0.149 0.157 0.177 0.172 0.095 0.139 0.146 0.121 0.212 
##  X196  X197  X198  X199  X200 
## 0.173 0.196 0.179 0.155 0.159
tmp.clustering.weighted.thr0p1$CCi
##    X1    X2    X3    X4    X5    X6    X7    X8    X9   X10   X11   X12   X13 
## 0.447 0.362 0.435 0.431 0.754 0.436 0.296 0.361 0.359 0.349 0.413 0.379 0.404 
##   X14   X15   X16   X17   X18   X19   X20   X21   X22   X23   X24   X25   X26 
## 0.271 0.471 0.269 0.330 0.259 0.326 0.208 0.458 0.278 0.509 0.523 0.561 0.690 
##   X27   X28   X29   X30   X31   X32   X33   X34   X35   X36   X37   X38   X39 
## 0.609 0.510 0.621 0.519 0.462 0.594 0.260 0.205 0.377 0.319 0.447 0.422 0.370 
##   X40   X41   X42   X43   X44   X45   X46   X47   X48   X49   X50   X51   X52 
## 0.422 0.462 0.562 0.373 0.348 0.205 0.380 0.378 0.357 0.307 0.344 0.401 0.327 
##   X53   X54   X55   X56   X57   X58   X59   X60   X61   X62   X63   X64   X65 
## 0.285 0.547 0.470 0.549 0.455 0.490 0.383 0.471 0.510 0.430 0.366 0.000 0.381 
##   X66   X67   X68   X69   X70   X71   X72   X73   X74   X75   X76   X77   X78 
## 0.169 0.230 0.331 0.258 0.335 0.392 0.373 0.254 0.445 0.580 0.473 0.519 0.312 
##   X79   X80   X81   X82   X83   X84   X85   X86   X87   X88   X89   X90   X91 
## 0.224 0.403 0.250 0.684 0.334 0.000 0.252 0.339 0.368 0.219 0.000 0.340 0.524 
##   X92   X93   X94   X95   X96   X97   X98   X99  X100  X101  X102  X103  X104 
## 0.557 0.478 0.491 0.562 0.364 0.352 0.263 0.345 0.000 0.472 0.398 0.417 0.389 
##  X105  X106  X107  X108  X109  X110  X111  X112  X113  X114  X115  X116  X117 
## 0.440 0.502 0.417 0.451 0.392 0.290 0.219 0.442 0.361 0.285 0.282 0.289 0.186 
##  X118  X119  X120  X121  X122  X123  X124  X125  X126  X127  X128  X129  X130 
## 0.316 0.523 0.357 0.174 0.226 0.332 0.623 0.516 0.580 0.645 0.603 0.595 0.593 
##  X131  X132  X133  X134  X135  X136  X137  X138  X139  X140  X141  X142  X143 
## 0.547 0.615 0.608 0.590 0.429 0.348 0.550 0.417 0.608 0.411 0.345 0.465 0.517 
##  X144  X145  X146  X147  X148  X149  X150  X151  X152  X153  X154  X155  X156 
## 0.589 0.513 0.578 0.124 0.175 0.199 0.144 0.441 0.381 0.364 0.265 0.309 0.207 
##  X157  X158  X159  X160  X161  X162  X163  X164  X165  X166  X167  X168  X169 
## 0.310 0.598 0.762 0.186 0.278 0.489 0.412 0.483 0.165 0.286 0.442 0.462 0.260 
##  X170  X171  X172  X173  X174  X175  X176  X177  X178  X179  X180  X181  X182 
## 0.312 0.226 0.181 0.210 0.276 0.279 0.521 0.473 0.272 0.348 0.293 0.584 0.487 
##  X183  X184  X185  X186  X187  X188  X189  X190  X191  X192  X193  X194  X195 
## 0.239 0.338 0.532 0.639 0.252 0.446 0.232 0.349 0.173 0.208 0.268 0.327 0.565 
##  X196  X197  X198  X199  X200 
## 0.524 0.471 0.302 0.422 0.362
# correlation of clustering coefficients across different thresholds
plot(tmp.clustering.weighted.thr1$CCi, tmp.clustering.weighted.thr0p1$CCi)

rcorr(tmp.clustering.weighted.thr1$CCi, tmp.clustering.weighted.thr0p1$CCi)
##      x    y
## x 1.00 0.59
## y 0.59 1.00
## 
## n= 200 
## 
## 
## P
##   x  y 
## x     0
## y  0

You can see that clustering is much higher in the network that was thresholded more restrictively. Any idea/hypothesis why this could be the case?

3.2 Degree

The degree of a node quantifies the number or strength of connections that a given node has to the rest of the network (see figure below)

The degree can also be computed relatively simple as the row or column Means of a adjacency matrix, which just quantifies the strength of connections that a given node has to the remaining 199 nodes. to this end, we also need to eliminate the diagnonal of the adjacency matrix.

# ROI-wise degree
# store matrix in temporary matrix
tmp.matrix.thr1 = current.adj.matrix.cleaned.matrix.thr1
tmp.matrix.thr0p1 = current.adj.matrix.cleaned.matrix.thr0p1

# replace the diagnonal (autocorrelations) with  NAs
diag(tmp.matrix.thr1) = 0
diag(tmp.matrix.thr0p1) = 0

# compute degree
tmp.degree.weighted.thr1 <- colMeans(tmp.matrix.thr1, na.rm = T)
tmp.degree.weighted.thr0p1 <- colMeans(tmp.matrix.thr0p1, na.rm = T)

# correlation of clustering coefficients across different thresholds
plot(tmp.degree.weighted.thr1,tmp.degree.weighted.thr0p1)

rcorr(tmp.degree.weighted.thr1,tmp.degree.weighted.thr0p1)
##      x    y
## x 1.00 0.82
## y 0.82 1.00
## 
## n= 200 
## 
## 
## P
##   x  y 
## x     0
## y  0

3.3 Community strength

We can also compute the degree of a node to ROIs of his own network, vs. to ROIs outside of the network. An example of this would be, how strong the posterior-cingulate cortex (i.e. a typical hub of the Default-Mode Network) is connected to the remaining DMN nodes, vs. everything outside of the DMN.

To compute the connectivity of a given node to members of his own network vs. members of other networks, we need to know first, to which network a given node belongs to. To this end, we can use the community vector.

# load community vector
Community <- read.table(paste0(dir.root, "Atlas/Schaefer2018_200Parcels_7Networks_order_numeric.txt"))
Community$names <- mapvalues(Community$V1, 
                             from=c(1,2,3,4,5,6,7), 
                             to=c("Visual", "Motor", "DAN", "VAN", "Limbic", "FPCN", "DMN"))

# check the number of ROIs assigned to each node
table(Community$names)
## 
##    DAN    DMN   FPCN Limbic  Motor    VAN Visual 
##     26     46     30     12     35     22     29

Next, lets compute the community strength. We need three input arguments, A = the connectivity matrix comm = the community vector (i.e. which network does a node belong to) weighted = a boolean (TRUE/FALSE), stating whether the network is weighted or not

# first, we need to replace the diagonal with a zero
tmp.matrix = current.adj.matrix.cleaned.matrix.thr1
diag(tmp.matrix) = 0

tmp.comm.str.within = comm.str(A = tmp.matrix,
         comm = Community$names,
         weighted = T,
         measure = c("within"))
tmp.comm.str.within
##      DAN      DMN     FPCN   Limbic    Motor      VAN   Visual 
## 1033.352 1383.617  980.124  378.626 1290.076  739.307 1018.256
tmp.comm.str.between = comm.str(A = tmp.matrix,
         comm = Community$names,
         weighted = T,
         measure = c("between"))
tmp.comm.str.between
##      DAN      DMN     FPCN   Limbic    Motor      VAN   Visual 
## 5790.007 5439.741 5843.235 6444.733 5533.282 6084.052 5805.103